/*
NOTES TO DATA ANALYSIS:
	1. Summary statistics: Outcomes, covariates, strata (by behavior and treatment arms)
		- for completed interviews
		- compare completed with uncompleted interviews
	2. Balance tests
	3. Attrition analysis
	4. Number of interviews by treatment arms
*/

clear all
set more off
capture log close

********************************************************************************

log using "$logfiles/04_analysis_descriptives_`c(current_date)'.log", replace

use "$covidclean/smscovid_clean.dta", clear

count

***RESPONSE RATES
tab respond, mi 
tab age , mi
tab complete if age>=18 & respond==1 & consent==1, mi
tab call_status if age>=18 & respond==0, mi
sum attempts, d
sum duration if age>=18 & complete==1 & consent==1, d
tab consent if respond==1, mi

*** Check for differential consent across pooled treatment
	preserve

		keep if age>=18

		tab consent, mi //3% did not consent
		
		foreach v in $treat_pool $treat_frames $treat_timings $treat_arms {
				gen `v'_sd = `v'*behavior_sd
				gen `v'_hw = `v'*behavior_hw
			}

		reg consent treatment_pooled_sd treatment_pooled_hw, r
		
		reghdfe consent treatment_pooled_sd treatment_pooled_hw, a(roundofinterview behavior_sd dayofinterview blockid enumerator) vce(robust)
	
	restore

***Keep relevant observations for analysis sample: consent taken and above 18 years of age, non-missing treatment arms
count //12757
keep if consent==1 & age>=18
keep if ~missing(treatment_arm) //0
count //3964

*Administrative data on SMS delivery reports
tab delivered_count treatment_pooled, mi

	*Replace SMS delivery count to 0 for control group
	replace delivered_count = 0 if treatment_pooled == 0
	bys treatment_pooled: summ delivered_count
	
	gen delivered = delivered_count>0
	replace delivered = . if missing(delivered_count)
	tab delivered treatment_pooled, mi
	la var delivered "Received COVID-related SMS (Admin)"
	
***HOUSEKEEPING - GLOBALS

	***STRATA
	global strata stratum
	
	***COLUMN TITLES
	global ctitles " "", Control, "", "", Treatment, "", "" \ "", Mean, SD, N, Mean, SD, N"
	
	***Font for table
	global font "basefont(footnotesize) statfont(footnotesize; footnotesize; footnotesize; footnotesize) rtitlfont(footnotesize; footnotesize; footnotesize; footnotesize) ctitlfont(footnotesize) notefont(scriptsize)"
	
	***VARIABLES 
	global demo saran location_2 location_3 age male educ_3 educ_4 occ_1 occ_7 caste_2 caste_4 religion_1 religion_2 phone 
	global sms smsread smstrust smsfreq_1 smsfreq_4
	global health know_fever know_cough pregnancy_care child_care
	
	*Shorten variables names for the table output
	la var educ_3 "Finished secondary school"
	la var educ_4 "More than secondary school"
	la var pregnancy_care "Received Antenatal Care"
	la var child_care "Child immunized"
	la var occ_7 "Manual job"
	la var smsfreq_1 "Did not read any SMS"
	la var smsfreq_4 "Read SMS daily"
	la var know_cough "Know: Cough"

********************************************************************************
***SUMMARY STATISTICS
********************************************************************************
	
	foreach b in demo sms health {
		
		local count: word count $`b'
		mat sumstat_`b' = J(`count',6,.)
		
		local i = 1
		foreach v of varlist $`b' {
			
			qui mean `v', over(treatment_pooled)
			
			*Control
			mat sumstat_`b'[`i',1] = e(b)[1,1]
			mat sumstat_`b'[`i',2] = e(sd)[1,1]
			mat sumstat_`b'[`i',3] = e(_N)[1,1]
			*Treatment
			mat sumstat_`b'[`i',4] = e(b)[1,2]
			mat sumstat_`b'[`i',5] = e(sd)[1,2]
			mat sumstat_`b'[`i',6] = e(_N)[1,2]	
						
			local i = `i' + 1
	
		}
		
		frmttable, statmat(sumstat_`b') replace tex fr sfmt(f,f,gc,f,f,gc) store(sumstat_`b') 
		
	}
	
	*SMS receipt admin values
		qui mean delivered if treatment_pooled==1
		local del_mean : di %9.2f round(e(b)[1,1],0.01)
		local del_sd : di %9.2f round(e(sd)[1,1],0.01)
		local del_n : di %9.0fc e(_N)[1,1]
		
	*SMS count admin values
		qui mean delivered_count if treatment_pooled==1
		local del_c_mean : di %9.2f round(e(b)[1,1],0.01)
		local del_c_sd : di %9.2f round(e(sd)[1,1],0.01)
		local del_c_n : di %9.0fc e(_N)[1,1]

********************************************************************************
***OTHER DESCRIPTIVES REPORTED IN MANUSCRIPT
********************************************************************************

// Calculating % that answer all questions
* Number of respondents that consented (and at least age 18)
count if consent == 1 & age > 17
local consented = r(N)
***** REPORTED IN MANUSCRIPT: DATA COLLECTION *****
dis "Number of respondents that consented to the survey: `consented'"

* Number of respondents that made it to the end of the survey (religion is the last question)
count if !mi(religion) & consent == 1 & age > 17
local completed_survey = r(N)
dis "Number of respondents that made it to the end of the survey: `full_survey'"

* Percent that made it to the end of the survey, among those that consented
***** REPORTED IN MANUSCRIPT: DATA COLLECTION *****
qui dis `completed_survey'/`consented'

// Calculating percent of respondents that were assigned to receive five-day recall calls (vs. 3 day recall)
* Number of respondents that were called in the treatment groups
count if !mi(fivedayrecall) & !mi(know_sd)
local all_treated = r(N)
dis "Number of respondents that were called in the treatment groups: `full_treated'"

* Number of treatment group respondents that were assigned to be called after 5 days (instead of 3)
count if fivedayrecall==1 & !mi(know_sd)
local called_fiveday = r(N)
dis "Number of treatment group respondents that were assigned to be called after 5 days (instead of 3): `called_fiveday'"

* fraction of treatment group respondents that were assigned to be called after 5 days (instead of 3)
***** REPORTED IN MANUSCRIPT: HETEROGENEITY AND SPILLOVERS *****
dis `called_fiveday'/`all_treated'
		
		
		
		
********************************************************************************
***BALANCE TESTS
********************************************************************************

	***Demographics
	
		*By pooled treatment arms
		local i = 1
		foreach var in $demo {
			qui reg `var' treatment_pooled i.$strata, r
			outreg, replace nodisplay sdec(2,3) ///
						 keep(treatment_pooled) se noautosumm starlevels(10 5 1) starloc(1) nosubstat ///
						 store(row`i') ///
						 rtitles("`: var label `var''")		
			outreg, replay(diff_demo) append(row`i') ctitles("", "$ \Delta$", "") multicol(1,2,2) store(diff_demo) nodisplay
			local ++i
		}
		outreg, replay(diff_demo)
		
		*Joint exogeneity test
		qui logit treatment_pooled $demo i.$strata, r
		local fp_demo : di %9.3f `e(p)'
		
		*Export balance table
		outreg using "$tables/balance", replace tex fr ///
			replay(sumstat_demo) merge(diff_demo) store(balance) ///
			ctitles("", Control, "", "", Treatment, "", "", "Difference", "" \ "", Mean, S.D., N, Mean, S.D., N, "$ \Delta$", S.E. \ "\bf{Panel A: Demographics}", "", "", "", "", "", "", "", "") ///
			addrows("", "", "", "", "", "", "", "", "" \ "\bf{Panel B: SMS-related}", "", "", "", "", "", "", "", "" \ "", "", "", "", "", "", "", "", "")
	
	***SMS-specific variables
	
		*By pooled treatment arms
		local i = 1
		foreach var in $sms {
			qui reg `var' treatment_pooled i.$strata, r
			outreg, replace nodisplay sdec(2,3) ///
						 keep(treatment_pooled) se noautosumm starlevels(10 5 1) starloc(1) nosubstat ///
						 store(row`i') ///
						 rtitles("`: var label `var''")		
			outreg, replay(diff_sms) append(row`i') ctitles("", "$ \Delta$", "") multicol(1,2,2) store(diff_sms) nodisplay
			local ++i
		}
		outreg, replay(sumstat_sms) merge(diff_sms)
		
		*Joint exogeneity test
		qui logit treatment_pooled $sms delivered i.$strata, r
		local fp_sms : di %9.3f `e(p)'
		
		*Export balance table
		outreg using "$tables/balance", replace tex fr replay(balance) append(sumstat_sms) ///
			addrows("Any SMS delivered (Admin)", "", "", "", "`del_mean'", "`del_sd'", "`del_n'", "", "" \ "$ #$ SMS delivered (Admin)", "", "", "", "`del_c_mean'", "`del_c_sd'", "`del_c_n'", "", "" \ "", "", "", "", "", "", "", "", "" \ "\bf{Panel C: Health}", "", "", "", "", "", "", "", "" \ "", "", "", "", "", "", "", "", "")
			
			
	***Health-specific variables
	
		*By pooled treatment arms
		local i = 1
		foreach var in $health {
			qui reg `var' treatment_pooled i.$strata, r
			outreg, replace nodisplay sdec(2,3) ///
						 keep(treatment_pooled) se noautosumm starlevels(10 5 1) starloc(1) nosubstat ///
						 store(row`i') ///
						 rtitles("`: var label `var''")		
			outreg, replay(diff_health) append(row`i') ctitles("", "$ \Delta$", "") multicol(1,2,2) store(diff_health) nodisplay
			local ++i
		}
		outreg, replay(sumstat_health) merge(diff_health)
		
		*Joint exogeneity test
		qui logit treatment_pooled $health i.$strata, r
		local fp_health : di %9.3f `e(p)'
		
		*Export balance table
		outreg using "$tables/balance", replace tex fr ///
			replay(balance) append(sumstat_health) ///
			addrows("\bf{Joint significance F-test}", "", "", "", "", "", "", "", "" \ "Panel A \it{(p-value)}", `fp_demo', "", "", "", "", "", "", "" \ "Panel B \it{(p-value)}", `fp_sms', "", "", "", "", "", "", "" \ "Panel C \it{(p-value)}", `fp_health', "", "", "", "", "", "", "") ///
			hlines(1{0}1;{0};{0};{0}10001) multicol(1,2,3;1,5,3;1,8,2;3,1,9;19,1,9;28,1,9) note("") $font

********************************************************************************
***NUMBER OF INTERVIEWS BY TREATMENT ARMS AND BEHAVIOR AND ROUNDS
********************************************************************************

***Globals for column and row titles and fonts
global ctitles_sample " "", Round, "", "", "", "", "", "", "", "", "", "", "", "", "", Total \ "", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, (by arm)"
global rtitles_sample "Neutral (2xmorn.) \ Public gain (2xmorn.) \ Public loss (2xmorn.) \ Private gain (2xmorn.) \ Private loss (2xmorn.) \ Neutral (morn./eve.) \ Public gain (morn./eve.) \ Public loss (morn./eve.) \ Private gain (morn./eve.) \ Private loss (morn./eve.) \ Total (by round)"
global font_sample "basefont(footnotesize) statfont(footnotesize; footnotesize; footnotesize; footnotesize) rtitlfont(footnotesize; footnotesize; footnotesize; footnotesize) ctitlfont(footnotesize) notefont(scriptsize)"
	
	forval i = 1/2 {

		if `i' == 1 local k = "sd"
		if `i' == 2 local k = "hw"

		estpost tabstat treatment_arm_2-treatment_arm_11 treatment_pooled if behavior==`i', by(roundofinterview) stat(sum) casewise
		mat sample_`k' = (e(treatment_arm_2) \ e(treatment_arm_3) \ e(treatment_arm_4) \ e(treatment_arm_5) \ e(treatment_arm_6) \ e(treatment_arm_7) \ e(treatment_arm_8) \ e(treatment_arm_9) \ e(treatment_arm_10) \ e(treatment_arm_11) \ e(treatment_pooled))
		frmttable using "$tables/sample_`k'", replace tex fr statmat(sample_`k') sdec(0,0,0,0,0,0,0,0,0,0) rtitles($rtitles_sample) ctitles($ctitles_sample) multicol(1,2,14) $font_sample

	}
	
********************************************************************************
***ATTRITION ANALYSIS
********************************************************************************

***Create dummies with treatment arms interacted with behavior
		foreach v in $treat_pool {
			gen `v'_sd = `v'*behavior_sd
			gen `v'_hw = `v'*behavior_hw
		}

reg complete treatment_pooled#behavior, robust
reghdfe complete $treat_pool_sd $treat_pool_hw, a($studycontrols) vce(robust)
reghdfe complete treatment_pooled, a($studycontrols) vce(robust)

********************************************************************************
***CORRELATION MATRIX
********************************************************************************

pwcorr know_sd know_hw act_sd act_hw contact_3 washhands_1 list_sd list_hw
	
log close					
exit, clear
